/*
 *  Copyright (C) 2011 Jaime Pavlich-Mariscal
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */



package cl.ucn.disc.biblio.refcluster.reference;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;

import cl.ucn.disc.biblio.refcluster.database.TupleOfStrings;
import cl.ucn.disc.biblio.refcluster.database.TupleOfStringsMatcher;


/** Stores and parses the information of a bibliographic reference in WoS format
 * @author Jaime Pavlich-Mariscal
 *
 */
public class Reference implements TupleOfStrings<Reference> {

	private String string;
	private String unprocessedString;

	private String year;
	private String author;
	public void setAuthor(String author) throws ParseException {
		this.author = author;
		updateSentences();
		updateStrings();
	}

	private String journal;
	private String volume;
	private String page;
	private String doi;

	public Sentence journalSentence = new Sentence();
	TupleOfStrings authorSentence = new Sentence();
	
	 
	Pattern yearPattern = Pattern.compile("[0-9][0-9][0-9][0-9]");
	Pattern authorPattern = Pattern.compile("([*][A-Za-z 0-9]+|[A-Za-z][A-Za-z0-9\\.]+[ ]*[A-Za-z0-9\\.]*)");
	Pattern journalPattern = Pattern.compile("[*]?[A-Za-z 0-9]+");;
	Pattern volumePattern = Pattern.compile("[Vv][0-9]*");
	Pattern pagePattern = Pattern.compile("[Pp][0-9]*");
	Pattern doiPattern = Pattern.compile("DOI [^$]+");
	
	public static TupleOfStringsMatcher<Reference> JOURNAL_VOL_PAGE_MATCHER = new TupleOfStringsMatcher<Reference>() {
	
	    @Override
	    public boolean matches(Reference r1, Reference r2) {
	        return r1.journalSentence.isSimilarTo(r2.journalSentence) && matches(r1.getVolume(), r2.getVolume()) && matches(r1.getPage(), r2.getPage());
	    }
	
	    public boolean matches(String str1, String str2) {
	        if (str1 == null || str2 == null) {
	            return true;
	        } else {
	            return str1.equals(str2) || str1.startsWith(str2) || str2.startsWith(str1);
	        }
	    }
	};



	public Reference() {
		
	}
	public Reference(String ref) throws ParseException {
		parse(ref);
	}
	public Reference(String author, String year, String journal, String volume, String page, String doi) throws ParseException {
		this();
		this.year = year;
		this.author = new Author(author).getString();
		this.journal = journal;
		this.volume = volume;
		this.page = page;
		this.doi = doi;
		updateSentences();
		updateStrings();
	}
	private void updateStrings() {
		List<String> l = new ArrayList<String>();
		l.add(author);
		l.add(year);
		l.add(journal);
		l.add(volume);
		l.add(page);
		l.add(doi);
		string = StringUtils.join(l,",");
		string = string.replaceAll(",+", ","); // Remove consecutive commas
		string = string.replaceAll(",$", ""); // Remove commas at the end
		unprocessedString = string;
		
	}
	private void updateSentences() {
		try {
			if (author != null) {
				authorSentence.parse(author);
			}
			if (journal != null) {
				journalSentence.parse(journal);
			}
		} catch (ParseException e) {
			e.printStackTrace();
			// do nothing
		}
		
	}
	public String getYear() {
		return year;
	}
	public String getAuthor() {
		return author;
	}
	public String getJournal() {
		return journal;
	}
	public String getVolume() {
		return volume;
	}
	public String getPage() {
		return page;
	}
	
	public String getDoi() {
		return doi;
	}
	
	@Override
	public void parse(String str) throws ParseException {
		this.unprocessedString = str;
		this.string = prepareRefString(str);
		String[] fields = str.split(",");
		for (int i=0; i<fields.length; i++ ) {
			fields[i] = fields[i].trim().replaceAll("\\s+", " ");
		}
		if (fields.length < 3) {
			throw new ParseException("Reference: Cannot parse string with less than 3 fields", 0);
		}
		int col=0;
		
		if (match(fields,col,authorPattern)) {
			author= new Author(fields[col]).getString();
//			authorSentence.initialize(author);
			col++;
		} else {
			author = null;
		}
		
		if (match(fields,col,yearPattern)) {
			year = fields[col];
			col++;
		} else {
			year = null;
		}
		
		if (match(fields,col,journalPattern) && !match(fields,col,volumePattern) && !match(fields,col,pagePattern)) {
			journal = fields[col];
//			journalSentence.initialize(journal);
			col++;
		} else {
			journal = null;
		}
		if (match(fields,col,volumePattern)) {
			volume = fields[col];
			col++;
		} else {
			volume = null;
		}
		
		// FIXME There are some pages that do not match the format P<number>. 
		// For example: BIANCHI G,2000,IEEE J SEL AREA COMM,V18,UNSP 535547
		if (match(fields,col,pagePattern)) {
			page = fields[col];
			col++;
		} else {
			page = null;
		}
		
		if (match(fields,col,doiPattern)) {
			doi = fields[col];
			col++;
		} else {
			doi = null;
		}
		updateSentences();
	}
	
	String prepareRefString(String str) {
		
		return str.trim().toUpperCase().replaceAll("\\s+", " ").replaceAll(",\\s", ",").replaceAll("\\s,", ",");
	}
	private boolean match(String[] strArray, int col, Pattern p) {
		if (col < strArray.length) {
			String str = strArray[col];
			str = str.trim();
			Matcher m = p.matcher(str);
			return m.find() && m.group().equals(str);
		}
		return false;
	}

	@Override
	public String toString() {
		return string;
	}
//	@Override
//	public String toString() {
//		StringBuilder builder = new StringBuilder();
//		builder.append("Ref [");
//		if (ref != null)
//			builder.append("ref=").append(ref).append(", ");
//		if (author != null)
//			builder.append("author=").append(author).append(", ");
//		if (journal != null)
//			builder.append("journal=").append(journal).append(", ");
//		if (page != null)
//			builder.append("page=").append(page).append(", ");
//		if (volume != null)
//			builder.append("volume=").append(volume).append(", ");
//		if (year != null)
//			builder.append("year=").append(year);
//		builder.append("]");
//		return builder.toString();
//	}
	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((string == null) ? 0 : string.hashCode());
		return result;
	}
	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		Reference other = (Reference) obj;
		if (string == null) {
			if (other.string != null)
				return false;
		} else if (!string.equals(other.string))
			return false;
		return true;
	}
	@Override
	public int compareTo(Reference o) {
		return string.compareTo(o.string);
	}
	public String getString() {
		return string;
	}
	@Override
	public String getUnprocessedString() {
		return unprocessedString;
	}
//	@SuppressWarnings("unchecked")
//	@Override
//	public <T> T getField(String key, Class<T> fieldType) {
//		try {
//			Method m = getClass().getMethod("get" + key.substring(0,1).toUpperCase() + key.substring(1), (Class[])null);
//			return (T) m.invoke(this, (Object[])null);
//		} catch (Exception e) {
//			e.printStackTrace();
//			return null;
//		}
//	}
//	@Override
//	public <T extends TupleOfStrings> T setField(String key, T value) {
//		throw new UnsupportedOperationException();
//	}

}
